from IPython.core.display import display, HTML
display(HTML("""<style> .container {width:96% !important;}</style>"""))
from IPython.display import IFrame
import pandas as pd
import numpy as np
from plotly.offline import init_notebook_mode, iplot
import cufflinks as cf
init_notebook_mode()
cf.go_offline()
from __future__ import division
import sys
sys.path.insert(0,'../')
from utils.paths import *
(Small Business Administration)

SBA acts much like an insurance provider to reduce the risk for a bank by taking on some of the risk through guaranteeing a portion of the loan.

nat = pd.read_csv(path_SBA + 'SBAnational_new.csv', sep = ';', low_memory=False)
nat5 = nat[nat.ApprovalFY.isin([2005])].reset_index(drop = True)
Let's assign grades for SBA based on the SBA ratio, i.e. grade 5 for below 0.2, grade 4 for ratio between 0.2 to 0.4
sba_ratio_th = [0, 0.2, 0.4, 0.6, 0.8, 1]
sba_grades = [5, 4, 3, 2, 1]
nat5['SBA_grades'] = pd.cut(nat5.SBA_ratio, bins = sba_ratio_th, labels = sba_grades)
nat5.SBA_grades.value_counts().sort_index(ascending = False)
def default_cat_table(data, cat):
default_cat = data.groupby([cat, 'default']).count().max(1).unstack()
default_cat['ALL'] = data[cat].value_counts()
default_cat['default_rate'] = (default_cat[1] / default_cat['ALL'])
default_cat = default_cat.rename(columns = {1: 'Default', 0: 'Non-default'})
return default_cat
default_cat_table(nat5, 'SBA_grades').sort_index(ascending = False)
def sba_claim(chgoffamount, sba_app):
if chgoffamount == 0:
return 0
else:
if sba_app <= chgoffamount:
claim = sba_app
else:
claim = chgoffamount
return claim
claim = nat5.apply(lambda x: sba_claim(x['ChgOffPrinGr'], x['SBA_Appv']), axis = 1)
nat5['SBA_claim'] = claim
print 'Overall lost ratio = ', round(nat5.SBA_claim.sum() / nat5.SBA_Appv.sum(), 3)
print 'Lost ratio for each grade: '
SBA_grade_df = pd.DataFrame(nat5.groupby('SBA_grades').GrAppv.sum())
SBA_grade_df['no. of cases'] = nat5.SBA_grades.value_counts()
SBA_grade_df['SBA_Appv'] = nat5.groupby('SBA_grades').SBA_Appv.sum()
SBA_grade_df['SBA_claim'] = nat5.groupby('SBA_grades').SBA_claim.sum()
SBA_grade_df['SBA_loss_ratio'] = SBA_grade_df.SBA_claim / SBA_grade_df.SBA_Appv
SBA_grade_df.sort_index(ascending = False)
nat5.head()
projection = pd.read_csv(path_SBA + 'result_table_proj.csv', sep = ';', low_memory=False)
projection = projection.rename(columns = {'Grade': 'ML_grades'})
nat5['ML_grades'] = projection['ML_grades']
def sba_ratio_ml(grade):
new_ratio = {1: 1,
2: 0.8,
3: 0.6,
4: 0.4,
5: 0.2}
return new_ratio[grade]
nat5.loc[:, 'ML_SBA_ratio'] = nat5.ML_grades.apply(sba_ratio_ml)
nat5.loc[:, 'ML_SBA_Appv'] = nat5.GrAppv * nat5.ML_SBA_ratio
# claim if using ML grades
nat5.loc[:, 'ML_SBA_claim'] = nat5.apply(lambda x: sba_claim(x['ChgOffPrinGr'], x['ML_SBA_Appv']), axis = 1)
nat5.groupby('ML_grades').ML_SBA_claim.sum()
nat5[['SBA_claim', 'ML_SBA_claim']].sum()
print 'Total insured amount = ', nat5.ML_SBA_Appv.sum()
print 'Total claim = ', nat5.ML_SBA_claim.sum()
print 'Overall lost ratio after using ML grades = ', round(nat5.ML_SBA_claim.sum() / nat5.ML_SBA_Appv.sum(), 3)
nat5.head()
print 'Lost ratio for each grade: '
ML_SBA_grade_df = pd.DataFrame(nat5.groupby('ML_grades').GrAppv.sum())
ML_SBA_grade_df['no. of cases'] = nat5.ML_grades.value_counts()
ML_SBA_grade_df['ML_SBA_Appv'] = nat5.groupby('ML_grades').ML_SBA_Appv.sum()
ML_SBA_grade_df['ML_SBA_claim'] = nat5.groupby('ML_grades').ML_SBA_claim.sum()
ML_SBA_grade_df['ML_SBA_loss_ratio'] = ML_SBA_grade_df.ML_SBA_claim / ML_SBA_grade_df.ML_SBA_Appv
ML_SBA_grade_df.sort_index(ascending = True)
nat5[['SBA_Appv', 'ML_SBA_Appv']].sum().iplot(kind = 'bar', title = 'Total insured amount', yTitle = 'USD',
dimensions = [600, 400])
nat5[['SBA_claim', 'ML_SBA_claim']].sum().iplot(kind = 'bar', title = 'Total claim amount', yTitle = 'USD',
dimensions = [600, 400])
# Saving 25.5 million
nat5[['SBA_claim', 'ML_SBA_claim']].sum().diff() / 10e6
(np.array(nat5[['SBA_claim', 'ML_SBA_claim']].sum()) / nat5[['SBA_Appv', 'ML_SBA_Appv']].sum()).iplot(kind = 'bar', title = 'Loss ratio', yTitle = 'loss ratio',
dimensions = [600, 400])
# Improve loss ratio by ~ 3%
(np.array(nat5[['SBA_claim', 'ML_SBA_claim']].sum()) / nat5[['SBA_Appv', 'ML_SBA_Appv']].sum()).diff()
display(SBA_grade_df, ML_SBA_grade_df)
def field_comparsion(var, SBA = SBA_grade_df, ML = ML_SBA_grade_df):
var1 = var.replace('ML_', '')
df1 = pd.DataFrame(SBA[var1]).reset_index().sort_values('SBA_grades', ascending = False)
df1 = df1.rename(columns = {'SBA_grades': 'Grades', var1: 'SBA'})
df1 = df1.reset_index(drop = True)
df2 = pd.DataFrame(ML[var]).reset_index()
df2 = df2.rename(columns = {'ML_grades': 'Grades', var: 'ML'})
df = pd.merge(df1, df2, on = 'Grades').set_index('Grades')
return df
field_comparsion('no. of cases').iplot(kind = 'bar', title = 'no. of cases')
field_comparsion('ML_SBA_loss_ratio').iplot(kind = 'bar', title = 'Loss ratio')
field_comparsion('ML_SBA_claim').iplot(kind = 'bar', title = 'Claims')